##########################################################################################

library(data.table)
library(optparse)
library(Seurat)
library(ArchR)
library(ggsci)

##########################################################################################
option_list <- list(
    make_option(c("--comine_data_file"), type = "character"),
    make_option(c("--rna_file"), type = "character"),
    make_option(c("--out_path"), type = "character") 
)

if(1!=1){
    
    ## 整合atac和rna的文件
    comine_data_file <- "~/20231121_singleMuti/results/atac_res/testis_combined_peak.combineRNA.Rdata"

    ## 单细胞表达文件
    rna_file <- "~/20231121_singleMuti/input/testis_combined.annotationCellType.Rdata"

    ## 输出
    out_path <- "~/20231121_singleMuti/results/qc_atac"

}


###########################################################################################
parseobj <- OptionParser(option_list=option_list, usage = "usage: Rscript %prog [options]")
opt <- parse_args(parseobj)
print(opt)

comine_data_file <- opt$comine_data_file
rna_file <- opt$rna_file
out_path <- opt$out_path

dir.create(out_path , recursive = T)

###########################################################################################

a <- load(comine_data_file)
## testis_combined_peak_combineRNA

b <- load(rna_file)
## scrnat

## 提取细胞
tmp <- testis_combined_peak_combineRNA@cellColData
table(subset(tmp , Clusters=="C15", "cell_type"))
#    Differenting&Differented SPG                    Diplotene 
#                          63                          132 
#   Early stage of spermatids            Endothelial cells 
#                          96                           20 
#                   Leptotene                 Leydig cells 
#                          80                           16 
#                 Macrophages                  Myoid cells 
#                           6                          101 
#                  Patchytene                    Pericytes 
#                         601                            3 
#        Round&ElongateS.tids                Sertoli cells 
#                          84                           32 
#                       Sperm                          SSC 
#                          66                           75 
#                    Zygotene 
#                          34 
## 去除15簇细胞
qc_cells <- rownames(subset(tmp , Clusters=="C15" ))
subCells <- rownames(tmp)[!(rownames(tmp) %in% qc_cells)]
cluster15_cells <- rownames(subset(tmp , Clusters=="C15"))

projHeme5 <- subsetArchRProject(
  ArchRProj = testis_combined_peak_combineRNA,
  cells = subCells,
  outputDirectory = out_path,
  dropCells = TRUE,
  force = TRUE
)


###########################################################################################
## rna标记这簇细胞在哪里
subCells_rna <- gsub( "#" , "_" , subCells )
cluster15_cells_rna <- gsub( "#" , "_" , cluster15_cells )

tmp <- scrnat
tmp$qc_atac <- ifelse( tmp$cell %in% subCells_rna , "PASS" , "NO" )
tmp$cluster15 <- ifelse( tmp$cell %in% cluster15_cells_rna , "YES" , "NO" )

## 画聚类图 ##
out_file <- paste0( out_path , "/magic3_cluster.forqclook.pdf" ) 
pdf(file=out_file,width=8,heigh=6)
DimPlot(tmp, reduction = "umap", label = TRUE, group.by = 'cell_type' )
DimPlot(tmp, reduction = "umap", label = TRUE, group.by = 'qc_atac' )
DimPlot(tmp, reduction = "umap", label = TRUE, group.by = 'cluster15' )
DimPlot(tmp, reduction = "umap", label = TRUE, group.by = 'cluster15' )
dev.off()


###########################################################################################
#### 上面为质控，下面为重新画图
## 细胞颜色
use_colors <- c(pal_npg("nrc")(10) , pal_jco("default")(6))
#names(use_colors) <- unique(scrnat$cell_type)
names(use_colors) <- unique(scrnat$cell_type)


###########################################################################################
## 重新聚类
projHeme5 <- addUMAP(projHeme5,saveModel = FALSE , force = TRUE)

## atac的聚类图,标记细胞类型
p <- plotEmbedding(ArchRProj = projHeme5, colorBy = "cellColData", name = "cell_type", embedding = "UMAP", pal=use_colors)
#p1 <- plotEmbedding(ArchRProj = projHeme5, colorBy = "cellColData", name = "Clusters", embedding = "UMAP")
p2 <- plotEmbedding(ArchRProj = projHeme5, colorBy = "cellColData", name = "Sample", embedding = "UMAP")

image_name <- paste0( out_path , "/plotEmbedding.atac.qc.pdf")
pdf(image_name , width = 10, height = 8)
print(p)
print(p2)
dev.off()

testis_combined_peak_combineRNA <- projHeme5
image_name <- paste0( out_path , "/testis_combined_peak.combineRNA.qc.Rdata")
save(  testis_combined_peak_combineRNA , file = image_name )

###########################################################################################
## 提取RNA的质控过的细胞画图
use_cell <- scrnat$cell[!(scrnat$cell %in% cluster15_cells_rna)]
scrnat_qc <- subset( scrnat , cell %in% use_cell )
scrnat <- scrnat_qc

## 重新聚类
scrnat <- RunUMAP(scrnat, dims = 1:10)

## 画聚类图 ##
out_file <- paste0( out_path , "/magic3_cluster.qc.pdf" ) 
pdf(file=out_file,width=8,heigh=6)
DimPlot(scrnat, reduction = "umap", label = TRUE , cols = use_colors , group.by = 'cell_type' )
DimPlot(scrnat, reduction = "umap", label = TRUE, group.by = 'batch' )
#DimPlot(scrnat, reduction = "umap", label = TRUE, group.by = 'seurat_clusters' )
dev.off()

## marker基因 ##
out_file <- paste0( out_path , "/magic3_cluster_marker.qc.pdf" ) 
pdf(file=out_file,width=12,height=10)
FeaturePlot(scrnat, features = c(
"UTF1", "KIT", "STRA8", 
"SPO11", "SYCP3", "OVOL2", 
"NME8" , "TXNDC8" ,"TNP1" , 
"PRM1" ,"AMH", "DLK1",
"MYH11","NOTCH3","CD14",
"VWF","NKG7" ,"FGFBP2"))  
dev.off()

## 存储最后用到的
image_name <- paste0( out_path , "/testis_combined.annotationCellType.qc.Rdata")
save(  scrnat , file = image_name )
